# -*- coding: utf-8 -*-
import math
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.spiders.base import BaseSpider
from njsc.rules import *


class MroSpider(BaseSpider):
    name = 'mro'
    custom_settings = {
        'CONCURRENT_REQUESTS': 8,
        # 'DOWNLOAD_DELAY': 0.3,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 6,
        'CONCURRENT_REQUESTS_PER_IP': 6,
    }

    # 常用链接
    nav_url = 'https://www.njsc365.com/static/navMenu.json'
    sku_list_url = 'https://www.njsc365.com/api/index.php/seller-gallery-{}.html?output=json&page={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(MroSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 改动无效
        self.rows = 20

    def start_requests(self):
        yield Request(
            url=self.nav_url,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            headers={
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh_CN',
                'Host': 'www.njsc365.com',
                'Upgrade-Insecure-Requests': '1',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36',
            },
            callback=self.parse_nav,
            errback=self.error_back,
        )

    def parse_nav(self, response):
        sp_list = parse_mro_sellers(response, include_ids=['01'])
        if sp_list and len(sp_list) > 0:
            self.logger.info('供应商: cnt=%s' % len(sp_list))
            yield Box("supplier", self.batch_no, sp_list)

        for sp in sp_list:
            page = 1
            sp_id = sp.get('id')
            sp_name = sp.get('name')
            yield Request(
                url=self.sku_list_url.format(sp_id, page),
                meta={
                    'reqType': 'sku',
                    'batchNo': self.batch_no,
                    'spId': sp_id,
                    'spName': sp_name,
                    'page': page
                },
                headers={
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh_CN',
                    'Host': 'www.njsc365.com',
                    'Upgrade-Insecure-Requests': '1',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36',
                },
                callback=self.parse_mro_sku,
                errback=self.error_back,
                priority=100
            )

    def parse_mro_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        sp_id = meta.get("spId")
        sp_name = meta.get("spName")
        sku_list = parse_mro_sku(response)
        if sku_list and len(sku_list) > 0:
            self.logger.info('清单: sp=%s, page=%s, cnt=%s' % (sp_id, cur_page, len(sku_list)))
            yield Box("sku", self.batch_no, sku_list)

            if cur_page == 1 and len(sku_list) >= self.rows:
                total = json.loads(response.text).get('data', {}).get('total', 0)
                total_page = math.ceil(int(total) / self.rows)
                self.logger.info('页数: sp=%s, total=%s' % (sp_id, total_page))
                for page in range(2, total_page + 1):
                    yield Request(
                        url=self.sku_list_url.format(sp_id, page),
                        meta={
                            'reqType': 'sku',
                            'batchNo': self.batch_no,
                            'spId': sp_id,
                            'spName': sp_name,
                            'page': page
                        },
                        headers={
                            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                            'Accept-Encoding': 'gzip, deflate',
                            'Accept-Language': 'zh_CN',
                            'Host': 'www.njsc365.com',
                            'Upgrade-Insecure-Requests': '1',
                            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36',
                        },
                        callback=self.parse_mro_sku,
                        errback=self.error_back,
                    )
        else:
            self.logger.info('空页: sp=%s, page=%s' % (sp_id, cur_page))
